*This file takes the cleaned data from 1.child.do and creates a monthly panel dataset for each child. 
*Note: the "1" refers to how many months after the initial diagnosis we want to follow the child, and the "2" refers to how many months of initial treatment are counted as "initial."

********************************************************************************
*DEFINE DIRECTORIES
local home CHILD
local main CHILD/JPE
local logs CHILD/JPE/logs
local data CHILD/JPE/data
local results CHILD/JPE/results
local network NETWORK
********************************************************************************

cd "`home'"

*bring in data
use full_data.dta, clear

*keep the 6-month pre and indefinitely after
* everybody is observed 6 months before
keep if months_after_diagnosis>=-6

*flag one observation per person
by patientid, s: generate i=_n==1

*generate age at diagnosis
replace age_at_diagnosis = age_at_diagnosis/12
replace age_at_diagnosis = floor(age_at_diagnosis)

*determine how long we see people after diagnosis
by patientid, s: egen max = max(months_after)
keep if max>=`1'

*collapse into single-record
keep if months_after<=`2'

*generate preperiod stuff
generate pre = months_after<0

foreach var of varlist mh_spend non_mh_spend total_spend i_hospital i_er {
	generate temp`var'=pre*`var'
	by patientid, s: egen pre_`var'=sum(temp`var')
	replace temp`var'=. if pre==0
	by patientid, s: egen pre_`var'_max = max(temp`var')
	}
drop temp*
drop *mh_spend* *_er* pre

*collapse into single records
egen treat = rowtotal(i_therapy i_otreat i_drug)
collapse (sum) treat (max) pre* first_mh, by(patientid)

*recode to 1's
replace treat=1 if treat>1

*merge in demographics
merge 1:1 patientid using ccc_pull_demographics.dta
drop if _merge==2
drop _merge

*generate age at diagnosis
generate date = mofd(first_mh)
format date %tm
generate age = floor((date - mofd(bday))/12)
drop bday

*merge in zcta
merge 1:1 patientid date using ccc_pull_coverage.dta
keep if _merge==3
drop _merge

rename mbrs zip

*convert to ZCTAs
preserve
cd "`network'"
import excel using PRINCETON_06192019.zip_to_zcta_2018.xlsx, firstrow clear
rename ZIP_CODE zip
destring zip ZCTA, replace
keep zip ZCTA
tempfile temp
save `temp', replace
restore

merge m:1 zip using `temp'
keep if _merge==3
drop _merge
rename ZCTA zcta_kid
generate year = yofd(first_mh)

*merge in market level measures
tostring zcta_kid, format(%05.0f) replace
cd "`home'"
merge m:1 zcta_kid year using market-measures.dta
keep if _merge==3
drop _merge

*generate location zip3
generate zip3 = substr(zcta,1,3)

*define pre-spending deciles
xtile spending = pre_total_spend, nq(10)
xtile max_spending = pre_total_spend_max, nq(10)

*recode pre-hospitalizations
generate in_hospital_pre = pre_i_hospital>0

*parse variables
keep patientid treat first_* female age zcta_kid year share_mh share_gp zip3 *spending* in_hospital_pre share_bad
*rename bad share_bad

*save regression data
cd "`home'"
save regression_data.dta, replace

*incorporate the TCA/benzo and FDA definitions of bad prescribing

*bring in data***************************************************************
import delimited ccc_pull_mhdrugs.txt, varnames(1) clear

*format date
generate date = date(serv_dt,"YMD")
format %td date
drop serv_dt

*fix up product code
tostring prod_serv_id_cd, gen(ndc11) format(%011.0f)
generate ndc9 = substr(ndc11,1,9)
drop ndc11 prod_serv_id_cd

*id active ingredient
destring ndc9, replace
cd "`network'"
merge m:1 ndc9 using child_index_mini.dta
drop if _merge==2
drop _merge
drop opioid

preserve
import delimited ccc_pull_demographics.txt, varnames(1) clear
keep patientid memb_brth_dt
by patientid, s: keep if _n==1
tempfile temp
save `temp', replace
restore

merge m:1 patientid using `temp'
keep if _merge==3
drop _merge

generate birth = date(memb_brth_dt,"YMD")
format birth %td
generate age = (mofd(date)-mofd(birth))/12
replace age= floor(age)

*flag "fda ok" prescribing
generate good = (active=="clomipramine" & age>=10) | (active=="duloxetine" & age>=7)| (active=="escitalopram" & age>=12)| (active=="fluoxetine" & age>=7) | (active=="fluvoxamine" & age>=8) | (active=="olanzapine-fluoxetine" & age>=10) | (active=="sertraline" & age>=6) | (active=="aripiprazole" & age>=6) | (active=="asenapine" & age>=10) | (active=="olanzapine" & age>=10) | (active=="quetiapine" & age>=10) | (active=="risperidone" & age>=5) | (active=="chlorpromazine" & age<=12) | (active=="loxapine" & age>=12) | (active=="perphenazine" & age>=12) | (active=="pimozide" & age>=12) | (active=="prochlorperazine" & age>2) | (active=="thiothixene" & age>=12) | (active=="thioridazine") | (active=="trifluoperazine" & age>=6)
generate bad = good==0

preserve
use patientid first_mh using regression_data.dta, clear
by patientid, s: keep if _n==1
tempfile temp
save `temp', replace
restore

merge m:1 patientid using `temp'
keep if _merge==3
drop _merge

generate diff = mofd(date)-mofd(first_mh)
keep if diff>0 & diff<=`2'

*generate bad flag
*foreach var of varlist benzo tca polypharmacy polypharmacy2 {
foreach var of varlist bad {
replace `var'=0 if `var'==.
}

*collapse (sum) benzo tca poly* bad, by(patientid)
collapse (sum) bad*, by(patientid)

save badly_treated.dta, replace

*merge into regression data
use regression_data.dta, clear
merge 1:1 patientid using badly_treated.dta
*replace kid_bad_fda = 0 if kid_bad_fda==.
*replace kid_bad_benzo_tca = 0 if kid_bad_benzo_tca==.
replace bad = 0 if bad==.
drop _merge

*preserve
*use patientid date first_mh total_spend i_drug mh_spend hospital_mh hospital_nonmh hospital i_er i_hospital time_in using full_data.dta, clear
*generate diff = date-mofd(first_mh)
*keep if diff>=0
*drop first_mh date
*reshape wide mh_spend- time_in, i(patientid) j(diff)
*save outcomes.dta, replace
*restore

*now, merge on outcome data
merge 1:1 patientid using outcomes.dta
keep if _merge==3
drop _merge

*destring zip3
destring zip3, replace

*save regression data
cd "`main'"
save final_`1'after_`2'mon.dta, replace
